install.packages("readxl")
library(readxl)
df = read_excel("italy_balances.xlsx")
names(df)
df = df[-c(1,2,3,4)]
Una prima possibilità è concetrarci solo sugli indici di bilancio
df2 = df[c("R.O.E. (Return on Equity)","R.O.I. Cerved (Return on Investment)","R.O.S. (Return on Sales)","ROT (Return On Turnover) FATTURATO / ATTIVO SP","R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP")]
summary(df2)
R.O.E. (Return on Equity) R.O.I. Cerved (Return on Investment) R.O.S. (Return on Sales) ROT (Return On Turnover) FATTURATO / ATTIVO SP
Min. :-945.92 Min. :-80.330 Min. :-889.6400 Min. :0.01636
1st Qu.: 0.17 1st Qu.: 0.780 1st Qu.: 1.1800 1st Qu.:1.34801
Median : 14.43 Median : 6.440 Median : 5.0700 Median :2.24448
Mean : 14.89 Mean : 9.336 Mean : -0.5772 Mean :2.35798
3rd Qu.: 46.00 3rd Qu.: 17.260 3rd Qu.: 10.0600 3rd Qu.:3.16350
Max. : 475.94 Max. : 65.150 Max. : 89.3500 Max. :9.06781
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP
Min. :-80.500
1st Qu.: 0.880
Median : 5.640
Mean : 8.688
3rd Qu.: 16.090
Max. : 62.820
install.packages("plotly")
apertura URL 'https://cran.rstudio.com/bin/macosx/contrib/4.2/plotly_4.10.3.tgz'
Content type 'application/x-gzip' length 3202729 bytes (3.1 MB)
==================================================
downloaded 3.1 MB
I pacchetti binari scaricati sono in
/var/folders/1m/gxy5wsh93qq112t_f6ddt48w0000gn/T//Rtmp6IaPTV/downloaded_packages
library(plotly)
Caricamento del pacchetto richiesto: ggplot2
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Caricamento pacchetto: ‘plotly’
Il seguente oggetto è mascherato da ‘package:ggplot2’:
last_plot
Il seguente oggetto è mascherato da ‘package:stats’:
filter
Il seguente oggetto è mascherato da ‘package:graphics’:
layout
plot(df2)
#Analizzare un po il dataset
plot_ly(data = df2, x=df2$`R.O.I. Cerved (Return on Investment)`, y=df2$`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP`, type = "scatter")
No scatter mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
No scatter mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Splitting
set.seed(0)
splitting = sample(1:nrow(df2), 0.8*nrow(df2))
train_data = df2[splitting, ]
test_data = df2[- splitting, ]
Prima regressione
model = lm( train_data$`R.O.I. Cerved (Return on Investment)` ~ train_data$`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP`, data = train_data)
summary(model)
Call:
lm(formula = train_data$`R.O.I. Cerved (Return on Investment)` ~
train_data$`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP`,
data = train_data)
Residuals:
Min 1Q Median 3Q Max
-79.553 -1.072 -0.299 1.011 35.456
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.05846 0.23495 4.505 7.82e-06 ***
train_data$`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP` 0.96170 0.01409 68.271 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.275 on 674 degrees of freedom
Multiple R-squared: 0.8737, Adjusted R-squared: 0.8735
F-statistic: 4661 on 1 and 674 DF, p-value: < 2.2e-16
Analizziamo la regressione
install.packages("lmtest")
library(lmtest)
install.packages("mvinfluence")
library(mvinfluence)
library(car)
#1 LINEARITA' DEI DATI (Residuals vs Fitted values)
plot(model)
#2 INDIPENDENZA DEI RESIDUI
#Dubrin-Watson test per vedere se gli errori sono correlati
dwtest(formula = model, alternative = "two.sided")
Durbin-Watson test
data: model
DW = 1.9611, p-value = 0.6117
alternative hypothesis: true autocorrelation is not 0
#3 DISTRIBUZIONE NORMALE DEI RESIDIU
#Lo shapiro so fa sui residui per vedere la distribuzione delgi errori
#Da vedere insieme al normal Q-Q plot
shapiro.test(model$residuals)
Shapiro-Wilk normality test
data: model$residuals
W = 0.56165, p-value < 2.2e-16
#Come valutare HOMO o ETERO SCHEDASTICITA' Breusch-Pagan Test (varianza errori non costante H1)
#Insieme allo scale-location plot
#Insieme al residual vs fitted plot
bptest(model) # p-value bassisimo, dobbiamo rifiutare H0, quinidi varianza non costante
studentized Breusch-Pagan test
data: model
BP = 2.5448, df = 1, p-value = 0.1107
#4 ANALISI DEI VALORI ESTREMI OUTLIERS (Y) ED OSSERVAZIONI AD ALTO LEVERAGE(X)
influencePlot(model)
influenceIndexPlot(model)
infIndexPlot(model)
barplot(cooks.distance(model))
#5 MULTICOLINEARITA' (non va bene quando ritorna valori superiori a 10 della variabile)
#Variance Inflation Factor
#vif(model)
Opterei per eliminarli perchè danno problemi anche sul Q&Q e Shapiro
df2[c(284,468,478,616),]
Con tutti i predittori per confronto anche con NN
model2 = lm( train_data$`R.O.I. Cerved (Return on Investment)` ~ ., data = train_data)
summary(model2)
Call:
lm(formula = train_data$`R.O.I. Cerved (Return on Investment)` ~
., data = train_data)
Residuals:
Min 1Q Median 3Q Max
-79.239 -1.179 -0.113 1.064 35.570
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.086436 0.406027 0.213 0.83148
`R.O.E. (Return on Equity)` -0.005140 0.003192 -1.610 0.10778
`R.O.S. (Return on Sales)` -0.001776 0.004370 -0.406 0.68464
`ROT (Return On Turnover) FATTURATO / ATTIVO SP` 0.427092 0.147208 2.901 0.00384 **
`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP` 0.967643 0.016759 57.740 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.246 on 671 degrees of freedom
Multiple R-squared: 0.8756, Adjusted R-squared: 0.8749
F-statistic: 1181 on 4 and 671 DF, p-value: < 2.2e-16
summary(model2)$sigma^2 #MSE
[1] 27.51841
plot(model2)
influencePlot(model)
NA
proviamo le predizioni con il test_data
test = data.frame(
actual = test_data$`R.O.I. Cerved (Return on Investment)`,
preds1 = predict(model, test_data),
preds2 = predict(model2, test_data)
)
Avvertimento: 'newdata' ha 169 righe ma la variabile trovata ha 676 righe
test
Il secondo modello è decisamente migliore a fare predizioni rispetto al primo, ma spesso si scosta parecchio
library(ggplot2)
library(lattice)
library(caret)
library(rpart)
library(rpart.plot)
model_dt = rpart(train_data$`R.O.I. Cerved (Return on Investment)` ~ ., data=train_data, cp=0.01000000)
print(model_dt)
n= 676
node), split, n, deviance, yval
* denotes terminal node
1) root 676 148460.1000 9.1462130
2) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 15.07 500 53405.7000 2.8620800
4) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< -28.435 8 2823.7820 -46.5100000 *
5) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=-28.435 492 30764.0100 3.6648780
10) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 3.795 271 13396.0400 -0.8597048
20) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< -4.845 49 1220.0290 -7.3075510 *
21) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=-4.845 222 9689.2050 0.5634685 *
11) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=3.795 221 5017.0580 9.2131220
22) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 9.075 127 2120.6550 6.8003150 *
23) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=9.075 94 1158.1480 12.4729800 *
3) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=15.07 176 19215.0600 26.9988600
6) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 30.945 133 3815.9480 22.3776700
12) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 22.325 80 746.4064 19.3663800 *
13) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=22.325 53 1249.1190 26.9230200 *
7) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=30.945 43 3773.8060 41.2923300
14) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 44.875 31 1291.8590 36.8500000 *
15) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=44.875 12 289.8000 52.7683300 *
summary(model_dt)
Call:
rpart(formula = train_data$`R.O.I. Cerved (Return on Investment)` ~
., data = train_data, cp = 0.01)
n= 676
CP nsplit rel error xerror xstd
1 0.51084002 0 1.0000000 1.0012834 0.10703886
2 0.13348974 1 0.4891600 0.4978523 0.08186267
3 0.08319349 2 0.3556702 0.4712059 0.07754073
4 0.07830590 3 0.2724768 0.3861187 0.07048742
5 0.01675066 4 0.1941709 0.2312154 0.04720192
6 0.01476590 5 0.1774202 0.2093518 0.04821259
7 0.01226203 6 0.1626543 0.2075929 0.04823238
8 0.01170856 7 0.1503923 0.1948070 0.04803373
9 0.01000000 8 0.1386837 0.1860572 0.04808283
Variable importance
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP R.O.S. (Return on Sales)
58 21
R.O.E. (Return on Equity) ROT (Return On Turnover) FATTURATO / ATTIVO SP
20 1
Node number 1: 676 observations, complexity param=0.51084
mean=9.146213, MSE=219.6156
left son=2 (500 obs) right son=3 (176 obs)
Primary splits:
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 15.07 to the left, improve=0.51084000, (0 missing)
R.O.E. (Return on Equity) < 15.81 to the left, improve=0.41513470, (0 missing)
R.O.S. (Return on Sales) < 6.385 to the left, improve=0.35622980, (0 missing)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 1.611753 to the left, improve=0.09955888, (0 missing)
Surrogate splits:
R.O.S. (Return on Sales) < 9.61 to the left, agree=0.861, adj=0.466, (0 split)
R.O.E. (Return on Equity) < 28.495 to the left, agree=0.855, adj=0.443, (0 split)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 7.420978 to the left, agree=0.741, adj=0.006, (0 split)
Node number 2: 500 observations, complexity param=0.1334897
mean=2.86208, MSE=106.8114
left son=4 (8 obs) right son=5 (492 obs)
Primary splits:
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < -28.435 to the left, improve=0.37108220, (0 missing)
R.O.S. (Return on Sales) < 0.26 to the left, improve=0.30635250, (0 missing)
R.O.E. (Return on Equity) < 3.965 to the left, improve=0.27261810, (0 missing)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 0.7025735 to the left, improve=0.05462503, (0 missing)
Node number 3: 176 observations, complexity param=0.0783059
mean=26.99886, MSE=109.1765
left son=6 (133 obs) right son=7 (43 obs)
Primary splits:
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 30.945 to the left, improve=0.60501010, (0 missing)
R.O.S. (Return on Sales) < 12.795 to the left, improve=0.23792960, (0 missing)
R.O.E. (Return on Equity) < 39.165 to the left, improve=0.10273760, (0 missing)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 1.479734 to the left, improve=0.03835117, (0 missing)
Surrogate splits:
R.O.S. (Return on Sales) < 18.405 to the left, agree=0.807, adj=0.209, (0 split)
R.O.E. (Return on Equity) < 90.925 to the left, agree=0.778, adj=0.093, (0 split)
Node number 4: 8 observations
mean=-46.51, MSE=352.9727
Node number 5: 492 observations, complexity param=0.08319349
mean=3.664878, MSE=62.52848
left son=10 (271 obs) right son=11 (221 obs)
Primary splits:
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 3.795 to the left, improve=0.40147290, (0 missing)
R.O.E. (Return on Equity) < 3.965 to the left, improve=0.34501190, (0 missing)
R.O.S. (Return on Sales) < 0.6 to the left, improve=0.33943730, (0 missing)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 1.611753 to the left, improve=0.07742249, (0 missing)
Surrogate splits:
R.O.E. (Return on Equity) < 4.39 to the left, agree=0.894, adj=0.765, (0 split)
R.O.S. (Return on Sales) < 3.35 to the left, agree=0.837, adj=0.638, (0 split)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 1.793141 to the left, agree=0.602, adj=0.113, (0 split)
Node number 6: 133 observations, complexity param=0.01226203
mean=22.37767, MSE=28.69134
left son=12 (80 obs) right son=13 (53 obs)
Primary splits:
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 22.325 to the left, improve=0.47705630, (0 missing)
R.O.S. (Return on Sales) < 6.885 to the left, improve=0.12783850, (0 missing)
R.O.E. (Return on Equity) < 31.035 to the left, improve=0.05785069, (0 missing)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 1.263069 to the left, improve=0.02107806, (0 missing)
Surrogate splits:
R.O.S. (Return on Sales) < 13.33 to the left, agree=0.662, adj=0.151, (0 split)
R.O.E. (Return on Equity) < 84.38 to the left, agree=0.624, adj=0.057, (0 split)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 1.015029 to the right, agree=0.617, adj=0.038, (0 split)
Node number 7: 43 observations, complexity param=0.0147659
mean=41.29233, MSE=87.76293
left son=14 (31 obs) right son=15 (12 obs)
Primary splits:
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 44.875 to the left, improve=0.58088490, (0 missing)
R.O.S. (Return on Sales) < 13.72 to the left, improve=0.22156580, (0 missing)
R.O.E. (Return on Equity) < 66.085 to the left, improve=0.17764150, (0 missing)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 2.440472 to the left, improve=0.03279907, (0 missing)
Node number 10: 271 observations, complexity param=0.01675066
mean=-0.8597048, MSE=49.43188
left son=20 (49 obs) right son=21 (222 obs)
Primary splits:
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < -4.845 to the left, improve=0.18563730, (0 missing)
R.O.S. (Return on Sales) < 0.26 to the left, improve=0.17634390, (0 missing)
R.O.E. (Return on Equity) < 0.06 to the left, improve=0.04779047, (0 missing)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 1.615666 to the left, improve=0.03614259, (0 missing)
Surrogate splits:
R.O.S. (Return on Sales) < -9.475 to the left, agree=0.904, adj=0.469, (0 split)
R.O.E. (Return on Equity) < -78.88 to the left, agree=0.841, adj=0.122, (0 split)
Node number 11: 221 observations, complexity param=0.01170856
mean=9.213122, MSE=22.70162
left son=22 (127 obs) right son=23 (94 obs)
Primary splits:
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 9.075 to the left, improve=0.34646890, (0 missing)
R.O.E. (Return on Equity) < 15.81 to the left, improve=0.17146310, (0 missing)
ROT (Return On Turnover) FATTURATO / ATTIVO SP < 0.8746952 to the left, improve=0.02946706, (0 missing)
R.O.S. (Return on Sales) < 13.165 to the left, improve=0.01056238, (0 missing)
Surrogate splits:
R.O.E. (Return on Equity) < 21.66 to the left, agree=0.692, adj=0.277, (0 split)
R.O.S. (Return on Sales) < 6.4 to the left, agree=0.629, adj=0.128, (0 split)
Node number 12: 80 observations
mean=19.36638, MSE=9.330081
Node number 13: 53 observations
mean=26.92302, MSE=23.56829
Node number 14: 31 observations
mean=36.85, MSE=41.67287
Node number 15: 12 observations
mean=52.76833, MSE=24.15
Node number 20: 49 observations
mean=-7.307551, MSE=24.89855
Node number 21: 222 observations
mean=0.5634685, MSE=43.64507
Node number 22: 127 observations
mean=6.800315, MSE=16.69807
Node number 23: 94 observations
mean=12.47298, MSE=12.32072
print(log(model_dt$variable.importance))
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP R.O.S. (Return on Sales)
11.758778 10.764507
R.O.E. (Return on Equity) ROT (Return On Turnover) FATTURATO / ATTIVO SP
10.714978 7.547904
rpart.plot(model_dt)
model_dt$cptable
CP nsplit rel error xerror xstd
1 0.51084002 0 1.0000000 1.0012834 0.10703886
2 0.13348974 1 0.4891600 0.4978523 0.08186267
3 0.08319349 2 0.3556702 0.4712059 0.07754073
4 0.07830590 3 0.2724768 0.3861187 0.07048742
5 0.01675066 4 0.1941709 0.2312154 0.04720192
6 0.01476590 5 0.1774202 0.2093518 0.04821259
7 0.01226203 6 0.1626543 0.2075929 0.04823238
8 0.01170856 7 0.1503923 0.1948070 0.04803373
9 0.01000000 8 0.1386837 0.1860572 0.04808283
install.packages("partykit")
apertura URL 'https://cran.rstudio.com/bin/macosx/contrib/4.2/partykit_1.2-20.tgz'
Content type 'application/x-gzip' length 2412379 bytes (2.3 MB)
==================================================
downloaded 2.3 MB
I pacchetti binari scaricati sono in
/var/folders/1m/gxy5wsh93qq112t_f6ddt48w0000gn/T//Rtmp6IaPTV/downloaded_packages
library(grid)
library(libcoin)
library(mvtnorm)
library(partykit)
model_dt = as.party(model_dt)
model_dt
Model formula:
train_data$`R.O.I. Cerved (Return on Investment)` ~ `R.O.E. (Return on Equity)` +
`R.O.S. (Return on Sales)` + `ROT (Return On Turnover) FATTURATO / ATTIVO SP` +
`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP`
Fitted party:
[1] root
| [2] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 15.07
| | [3] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < -28.435: -46.510 (n = 8, err = 2823.8)
| | [4] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= -28.435
| | | [5] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 3.795
| | | | [6] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < -4.845: -7.308 (n = 49, err = 1220.0)
| | | | [7] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= -4.845: 0.563 (n = 222, err = 9689.2)
| | | [8] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 3.795
| | | | [9] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 9.075: 6.800 (n = 127, err = 2120.7)
| | | | [10] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 9.075: 12.473 (n = 94, err = 1158.1)
| [11] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 15.07
| | [12] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 30.945
| | | [13] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 22.325: 19.366 (n = 80, err = 746.4)
| | | [14] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 22.325: 26.923 (n = 53, err = 1249.1)
| | [15] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 30.945
| | | [16] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 44.875: 36.850 (n = 31, err = 1291.9)
| | | [17] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 44.875: 52.768 (n = 12, err = 289.8)
Number of inner nodes: 8
Number of terminal nodes: 9
plot(model_dt, inner_panel = node_inner(model_dt, pval = FALSE, id = FALSE),
terminal_panel = node_boxplot(model_dt, id = TRUE))
newdata_dt= data.frame(ROI_test = test_data$`R.O.I. Cerved (Return on Investment)`)
newdata_dt$ROIpred = predict(model_dt, test_data, type="response")
newdata_dt$std_err = newdata_dt$ROI_test - newdata_dt$ROIpred
newdata_dt
summary(newdata_dt)
ROI_test ROIpred std_err
Min. :-23.65 Min. :-7.3075 Min. :-17.81347
1st Qu.: 0.62 1st Qu.: 0.5635 1st Qu.: -2.54637
Median : 5.20 Median : 6.8003 Median : 0.03363
Mean : 10.09 Mean :10.3310 Mean : -0.23730
3rd Qu.: 18.43 3rd Qu.:19.3664 3rd Qu.: 2.42969
Max. : 65.15 Max. :52.7683 Max. : 23.31362
mean((newdata_dt$std_err)^2)
[1] 28.90742
plot_ly(data = newdata_dt, y=newdata_dt$std_err, x=seq(1,169), color=newdata_dt$std_err)
No trace type specified:
Based on info supplied, a 'scatter' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter
No scatter mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
No trace type specified:
Based on info supplied, a 'scatter' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter
No scatter mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
#Analisi predizioni con Regressione
newdata_reg = data.frame(ROI_test = test_data$`R.O.I. Cerved (Return on Investment)`)
newdata_reg$ROI_pred = predict(model2, test_data)
newdata_reg$std_err = newdata_reg$ROI_test - newdata_reg$ROI_pred
newdata_reg
summary(newdata_reg)
ROI_test ROI_pred std_err
Min. :-23.65 Min. :-25.054 Min. :-20.3669
1st Qu.: 0.62 1st Qu.: 1.409 1st Qu.: -1.3373
Median : 5.20 Median : 6.104 Median : -0.1243
Mean : 10.09 Mean : 10.541 Mean : -0.4468
3rd Qu.: 18.43 3rd Qu.: 16.951 3rd Qu.: 0.8666
Max. : 65.15 Max. : 60.476 Max. : 26.3112
mean((newdata_reg$std_err)^2)
[1] 21.76269
plot_ly(data = newdata_reg, y=newdata_reg$std_err, x=seq(1,169), color=newdata_reg$std_err)
No trace type specified:
Based on info supplied, a 'scatter' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter
No scatter mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
No trace type specified:
Based on info supplied, a 'scatter' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#scatter
No scatter mode specifed:
Setting the mode to markers
Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode